library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.2.3
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.2     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ Hmisc::src()       masks dplyr::src()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(moments)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.2.3
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 4.2.3
library(lubridate)
library(ggplot2)
library(reactable)
## Warning: package 'reactable' was built under R version 4.2.3
data <-read.csv("37-00049_UOF-P_2016_prepped.csv")
summary(data) #Displaying Data Summary
##  INCIDENT_DATE      INCIDENT_TIME       UOF_NUMBER         OFFICER_ID       
##  Length:2384        Length:2384        Length:2384        Length:2384       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  OFFICER_GENDER     OFFICER_RACE       OFFICER_HIRE_DATE 
##  Length:2384        Length:2384        Length:2384       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##  OFFICER_YEARS_ON_FORCE OFFICER_INJURY     OFFICER_INJURY_TYPE
##  Length:2384            Length:2384        Length:2384        
##  Class :character       Class :character   Class :character   
##  Mode  :character       Mode  :character   Mode  :character   
##  OFFICER_HOSPITALIZATION  SUBJECT_ID        SUBJECT_RACE      
##  Length:2384             Length:2384        Length:2384       
##  Class :character        Class :character   Class :character  
##  Mode  :character        Mode  :character   Mode  :character  
##  SUBJECT_GENDER     SUBJECT_INJURY     SUBJECT_INJURY_TYPE SUBJECT_WAS_ARRESTED
##  Length:2384        Length:2384        Length:2384         Length:2384         
##  Class :character   Class :character   Class :character    Class :character    
##  Mode  :character   Mode  :character   Mode  :character    Mode  :character    
##  SUBJECT_DESCRIPTION SUBJECT_OFFENSE    REPORTING_AREA         BEAT          
##  Length:2384         Length:2384        Length:2384        Length:2384       
##  Class :character    Class :character   Class :character   Class :character  
##  Mode  :character    Mode  :character   Mode  :character   Mode  :character  
##     SECTOR            DIVISION         LOCATION_DISTRICT  STREET_NUMBER     
##  Length:2384        Length:2384        Length:2384        Length:2384       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  STREET_NAME        STREET_DIRECTION   STREET_TYPE       
##  Length:2384        Length:2384        Length:2384       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##  LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION LOCATION_CITY     
##  Length:2384                                  Length:2384       
##  Class :character                             Class :character  
##  Mode  :character                             Mode  :character  
##  LOCATION_STATE     LOCATION_LATITUDE  LOCATION_LONGITUDE INCIDENT_REASON   
##  Length:2384        Length:2384        Length:2384        Length:2384       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  REASON_FOR_FORCE   TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2 TYPE_OF_FORCE_USED3
##  Length:2384        Length:2384         Length:2384         Length:2384        
##  Class :character   Class :character    Class :character    Class :character   
##  Mode  :character   Mode  :character    Mode  :character    Mode  :character   
##  TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5 TYPE_OF_FORCE_USED6
##  Length:2384         Length:2384         Length:2384        
##  Class :character    Class :character    Class :character   
##  Mode  :character    Mode  :character    Mode  :character   
##  TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8 TYPE_OF_FORCE_USED9
##  Length:2384         Length:2384         Length:2384        
##  Class :character    Class :character    Class :character   
##  Mode  :character    Mode  :character    Mode  :character   
##  TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES   FORCE_EFFECTIVE   
##  Length:2384          Length:2384        Length:2384       
##  Class :character     Class :character   Class :character  
##  Mode  :character     Mode  :character   Mode  :character
dt_new<-data[-1,]
dt_new[dt_new=="" | dt_new== " " | dt_new == "NULL" ] <- NA
head(dt_new)
##   INCIDENT_DATE INCIDENT_TIME    UOF_NUMBER OFFICER_ID OFFICER_GENDER
## 2      9/3/2016    4:14:00 AM         37702      10810           Male
## 3     3/22/2016   11:00:00 PM         33413       7706           Male
## 4     5/22/2016    1:29:00 PM         34567      11014           Male
## 5     1/10/2016    8:55:00 PM         31460       6692           Male
## 6     11/8/2016    2:30:00 AM  37879, 37898       9844           Male
## 7     9/11/2016    7:20:00 PM         36724       9855           Male
##   OFFICER_RACE OFFICER_HIRE_DATE OFFICER_YEARS_ON_FORCE OFFICER_INJURY
## 2        Black          5/7/2014                      2             No
## 3        White          1/8/1999                     17            Yes
## 4        Black         5/20/2015                      1             No
## 5        Black         7/29/1991                     24             No
## 6        White         10/4/2009                      7             No
## 7        White         6/10/2009                      7             No
##            OFFICER_INJURY_TYPE OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## 2 No injuries noted or visible                      No      46424        Black
## 3                Sprain/Strain                     Yes      44324     Hispanic
## 4 No injuries noted or visible                      No      45126     Hispanic
## 5 No injuries noted or visible                      No      43150     Hispanic
## 6 No injuries noted or visible                      No      47307        Black
## 7 No injuries noted or visible                      No      46549        White
##   SUBJECT_GENDER SUBJECT_INJURY          SUBJECT_INJURY_TYPE
## 2         Female            Yes      Non-Visible Injury/Pain
## 3           Male             No No injuries noted or visible
## 4           Male             No No injuries noted or visible
## 5           Male            Yes               Laceration/Cut
## 6           Male             No No injuries noted or visible
## 7         Female             No No injuries noted or visible
##   SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION          SUBJECT_OFFENSE
## 2                  Yes   Mentally unstable                    APOWW
## 3                  Yes   Mentally unstable                    APOWW
## 4                  Yes             Unknown                    APOWW
## 5                  Yes FD-Unknown if Armed           Evading Arrest
## 6                  Yes             Unknown Other Misdemeanor Arrest
## 7                  Yes             Unknown               Assault/FV
##   REPORTING_AREA BEAT SECTOR      DIVISION LOCATION_DISTRICT STREET_NUMBER
## 2           2062  134    130       CENTRAL               D14           211
## 3           1197  237    230     NORTHEAST                D9          7647
## 4           4153  432    430     SOUTHWEST                D6           716
## 5           4523  641    640 NORTH CENTRAL               D11          5600
## 6           2167  346    340     SOUTHEAST                D7          4600
## 7           1134  235    230     NORTHEAST                D9          1234
##    STREET_NAME STREET_DIRECTION STREET_TYPE
## 2        Ervay                N         St.
## 3     Ferguson             <NA>         Rd.
## 4 bimebella dr             <NA>         Ln.
## 5          LBJ             <NA>       Frwy.
## 6    Malcolm X                S       Blvd.
## 7        Peavy             <NA>         Rd.
##   LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION LOCATION_CITY LOCATION_STATE
## 2                               211 N ERVAY ST        Dallas             TX
## 3                             7647 FERGUSON RD        Dallas             TX
## 4                             716 BIMEBELLA LN        Dallas             TX
## 5                               5600 L B J FWY        Dallas             TX
## 6                        4600 S MALCOLM X BLVD        Dallas             TX
## 7                                1234 PEAVY RD        Dallas             TX
##   LOCATION_LATITUDE LOCATION_LONGITUDE INCIDENT_REASON REASON_FOR_FORCE
## 2         32.782205         -96.797461          Arrest           Arrest
## 3         32.798978         -96.717493          Arrest           Arrest
## 4          32.73971          -96.92519          Arrest           Arrest
## 5              <NA>               <NA>          Arrest           Arrest
## 6              <NA>               <NA>          Arrest           Arrest
## 7         32.837527         -96.695566          Arrest           Arrest
##      TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2 TYPE_OF_FORCE_USED3
## 2  Hand/Arm/Elbow Strike                <NA>                <NA>
## 3            Joint Locks                <NA>                <NA>
## 4      Take Down - Group                <NA>                <NA>
## 5         K-9 Deployment                <NA>                <NA>
## 6         Verbal Command     Take Down - Arm                <NA>
## 7 Hand Controlled Escort                <NA>                <NA>
##   TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5 TYPE_OF_FORCE_USED6
## 2                <NA>                <NA>                <NA>
## 3                <NA>                <NA>                <NA>
## 4                <NA>                <NA>                <NA>
## 5                <NA>                <NA>                <NA>
## 6                <NA>                <NA>                <NA>
## 7                <NA>                <NA>                <NA>
##   TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8 TYPE_OF_FORCE_USED9
## 2                <NA>                <NA>                <NA>
## 3                <NA>                <NA>                <NA>
## 4                <NA>                <NA>                <NA>
## 5                <NA>                <NA>                <NA>
## 6                <NA>                <NA>                <NA>
## 7                <NA>                <NA>                <NA>
##   TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## 2                 <NA>             <NA>             Yes
## 3                 <NA>             <NA>             Yes
## 4                 <NA>             <NA>             Yes
## 5                 <NA>             <NA>             Yes
## 6                 <NA>             <NA>         No, Yes
## 7                 <NA>             <NA>             Yes
###..................................imputation........................................
# skew test for latitude
dt_new$LOCATION_LATITUDE <- as.numeric(dt_new$LOCATION_LATITUDE)
skewness(dt_new$LOCATION_LATITUDE,na.rm=TRUE)
## [1] 0.5937679
dt_new$LOCATION_LONGITUDE <- as.numeric(dt_new$LOCATION_LONGITUDE)
skewness(dt_new$LOCATION_LONGITUDE,na.rm=TRUE)
## [1] 0.2865864
dt_new$LOCATION_LATITUDE <- impute(dt_new$LOCATION_LATITUDE,mean)
dt_new$LOCATION_LONGITUDE <- impute(dt_new$LOCATION_LONGITUDE,mean)
#############....................mode imputaion on time column...........................
Mode <- function(x) {
  x <- na.omit(x)
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

cols_to_impute <- c("INCIDENT_TIME")
for (col in cols_to_impute) { 
if (sum(is.na(dt_new[[col]])) > 0) {
  mode_value <- Mode(dt_new[[col]])
  dt_new[[col]][is.na(dt_new[[col]])] <- mode_value
  }
}
##########................Groupin and summarizing performed to plot the pie chart.........................

force_used <- dt_new$TYPE_OF_FORCE_USED1
subject_injury <- dt_new$SUBJECT_INJURY == "Yes"
data1 <- data.frame(force_used, subject_injury)


summary_data <- data1 %>%
  group_by(force_used) %>%
  summarise(total_count = n(), injury_count = sum(subject_injury)) %>%
  mutate(percent_injury = injury_count / total_count * 100)

# Sort the data by percentage of injury
summary_data <- summary_data[order(summary_data$percent_injury, decreasing = TRUE),]

# Keep only the top 5 force types and combine the rest into a single "Other" category
top_force <- summary_data$force_used[1:5]
other_force <- "Other"
summary_data$force_used <- ifelse(summary_data$force_used %in% top_force, summary_data$force_used, other_force)
summary_data <- summary_data %>%
  group_by(force_used) %>%
  summarise(total_count = sum(total_count), injury_count = sum(injury_count)) %>%
  mutate(percent_injury = injury_count / total_count * 100)

###.................Pie Chart..........................

fig <- plot_ly(summary_data, labels = ~force_used, values = ~percent_injury, type = "pie",
               marker = list(colors = c("#FF00FF", "#blue", "#red", "yellow", "#brown", "#pink")),
               textinfo = "label+percent")
fig <- fig %>% layout(title = "Percentage of Subject Injury by Type of Force Used")
fig

he police have a responsibility to ensure that they do not cause unnecessary harm to individuals who are taken into custody. To achieve this, law enforcement officers are trained to use different types of force such as verbal commands, physical holds, and weapons such as tasers, batons, and pepper spray. To monitor this, police departments often collect data on the types of force used by their officers and the injuries that result. The pie chart shows that K9 deployment, pepper ball saturation, and baton strikes are responsible for a significant proportion of injuries, contributing to over 63% of injuries caused to the subject. Therefore, police departments need to minimize the usage of these types of force in order to maintain their protocols and ensure that the apprehension of suspects is carried out in the least harmful manner possible.

dt_new$TIME<-dt_new$INCIDENT_TIME
dt_new$TIME <- format(strptime(dt_new$TIME, "%I:%M:%S %p"), "%H:%M:%S")
dt_new$INC_HOUR <- substr(dt_new$INCIDENT_TIME, 0, 2)
##########...........HISTOGRAM..........................
data_time <- table(dt_new$INC_HOUR)
data_time <- as.data.frame(data_time)
names(data_time) <- c("Time", "Frequency")
data_time <- data_time %>% group_by(Time)

p <- ggplot(data_time, aes(x = Time, y = Frequency)) +
  geom_bar(stat = "identity", width = 0.8, fill = "yellow") +
  ggtitle("Counts of incidents by time") +
  xlab("Time") +
  ylab("Count if incidents") +
  theme_dark() 

# Make the plot interactive using plotly
ggplotly(p)

Crime incidents are a prevalent and unfortunate occurrence in our society, and it is essential for people to be aware and not put themselves in risky situations. To understand the trends of crime incidents happening at different times of the day, a histogram was created to display the number of incidents reported in specific time intervals in Dallas,TX,USA during the year 2016. The peak hour for crime incidents is around 5 pm in the evening, followed by 8 pm in the evening and 12 pm at noon. These findings highlight the need for people to be extra cautious during these times of the day and take necessary precautions to ensure their safety. By releasing this information, the police department can raise awareness among the public and encourage them to take proactive steps to prevent crime incidents.

#########..................VIOLIN PLOT..................
sex <- dt_new$OFFICER_GENDER
years_of_service <- as.numeric(dt_new$OFFICER_YEARS_ON_FORCE)
wdata <- data.frame(sex, years_of_service)
fig <- wdata %>%plot_ly(x = ~sex,y = ~years_of_service,type = 'violin',box = list(visible = T),
meanline = list(visible = T),
split = ~factor(sex)) 
fig <- fig %>%layout(xaxis = list(title = "Sex"),yaxis = list(title = "Years Of Service",zeroline = F))
fig

Understanding the trend between years of service and gender in a police department is important for identifying potential gender bias, providing insights into diversity and inclusivity, and developing better strategies for retention and promotion of officers.As we see from the given violent plot, maximum number of years that a woman serve for the department is about 31 years while for men it’s 36 years back in 2016,in Dallas. It indicates that men serve a bit longer than women police force.Hence female police officers are to be encouraged more to provide their contributions to the department.

############# Interactive map using leaflet##########################
loc <- dt_new%>%
group_by(LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION,LOCATION_LATITUDE,LOCATION_LONGITUDE) %>%
summarise(count = n()) %>%
arrange(desc(count))
## `summarise()` has grouped output by
## 'LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION', 'LOCATION_LATITUDE'. You can
## override using the `.groups` argument.
loc$LOCATION_LATITUDE <- as.numeric(format(loc$LOCATION_LATITUDE, nsmall = 6))
loc$LOCATION_LONGITUDE <- as.numeric(format(loc$LOCATION_LONGITUDE, nsmall = 6))
top_locations<-head(loc,20)
leaflet() %>%addProviderTiles(providers$Esri.WorldStreetMap) %>%addTerminator() %>%  
addEasyButton(easyButton(icon="fa-globe", title="Zoom to Level 1",onClick=JS("function(btn, map){ map.setZoom(1); }"))) %>%
addScaleBar() %>%setView(lng = -96.7, lat = 32.78, zoom = 10)%>%addEasyButton(easyButton(icon="fa-crosshairs", title="Locate Dallas",onClick=JS("function(btn, map){ map.setView([ 32.77666,-96.79699], zoom = 10); }")))%>%

addMiniMap(toggleDisplay = TRUE,position="bottomright") %>% 
addMarkers(data = top_locations,lat = ~LOCATION_LATITUDE,lng = ~LOCATION_LONGITUDE,popup = ~paste(LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION, "<br>No of Incidents reported: ", count)
)%>%
leaflet.extras::addSearchOSM(options = searchOptions(collapsed = TRUE))

The safety of individuals is of utmost importance, and an interactive map has been created to highlight the top 15 locations that had the most reported incidents in 2016. This map utilizes the latitude and longitude information available in the data set to pinpoint these locations on the map. 111 W COMMERCE STREET had reported the highest number of incidents approximately 22 in the year 2016 which i in close proximity with other locations such as 205 S LAMAR ST, 1100 S SAINT PAUL ST, 1600 CHESTNUT ST, and 2500 VICTORY AVE streets that also appear on the top 15 list of high volume incidents recorded. It is important for people to take responsibility for their own safety and avoid roaming around these danger spots. especially during the time of day reported through the histogram.By staying informed and being aware of the potential danger zones in their community, people can take steps to protect themselves and stay safe.

#####################Scatter Plot#######################
df <- data.frame(BEAT = 1:1000, OFFICER_YEARS_ON_FORCE = rnorm(1000, 10, 5))

# Group BEAT numbers into intervals of 20
df$BEAT_GROUP <- cut(df$BEAT, breaks = seq(0, 1000, by = 20))


# Filter out missing values
dt_new_filtered <- df[!is.na(df$BEAT) & !is.na(df$OFFICER_YEARS_ON_FORCE),]

# Create the scatter plot
scatterplt<- ggplot(dt_new_filtered, aes(x = BEAT, y = OFFICER_YEARS_ON_FORCE)) +
  geom_point() +
  geom_smooth(method=lm)+
  labs(y = "Officer Years on Force", x = "Beat") +
  ggtitle("Scatter Plot of Officer Years on Force vs. Beat") +
  theme_dark()

ggplotly(scatterplt)
## `geom_smooth()` using formula = 'y ~ x'

The scatter plot created between BEAT and officer years on force for the particular BEAT gives us valuable insights into the distribution of experienced officers across different BEATS . This is because, in general, beats with a higher number of incidents reported in the previous years require more experienced officers to handle the situation effectively.To ensure that the plot is easy to interpret, the beats were grouped into four categories based on the number of incidents reported. These categories were 0-250, 250-500, 500-750, and 750-1000. The trend line on the plot created by geom smooth suggests that the amount of experienced officers and least experienced officers required to monitor are distributed equally around the beats. However, a more detailed visual analysis revealed that officers with more than 25+ years of experience are more likely to handle the beats in the 500-750 category on their own, indicating that this category may require more experienced officers than the others. Overall, the scatter plot and the analysis provide valuable insights into the distribution of experienced officers across different beats, which can inform police department decisions on officer deployment and resource allocation.

###########################TIME SERIES PLOT###############################################
dt_new <- dt_new %>% mutate(INCIDENT_TIME = if_else(INCIDENT_TIME == "NULL", "00:00:00", INCIDENT_TIME))
dt_new <- dt_new %>% mutate(INCIDENT_DATETIME = mdy_hms(paste(INCIDENT_DATE, INCIDENT_TIME)))

filtered_dt_new <- dt_new %>% drop_na(INCIDENT_DATETIME)
filtered_dt_new <- filtered_dt_new %>% mutate(Months = floor_date(INCIDENT_DATETIME, "month"))

injury_data <- filtered_dt_new %>% filter(OFFICER_INJURY == "Yes")
injury_per_month <- injury_data %>%
  group_by(Months) %>%
  summarise(injuries = n())

injury_per_month$Months<- as.Date(injury_per_month$Months)

time_series <- ggplot(injury_per_month, aes(x = Months, y = injuries)) +
  geom_line(color = "darkblue",size=2) +
  geom_point(color = "orange",size=3) +
  theme_dark() +
  scale_x_date(labels = scales::date_format("%b %Y"), breaks = scales::date_breaks("1 months")) +
  scale_y_continuous(limits = c(10, 140)) +
  labs(x = "Month range",
       y = "Number of Officer Injured",
       title = "Subject officer by Month")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display the plot
ggplotly(time_series)

There are several consequences/implications that can be performed on understanding the trend between the number of officers injured in a month in a police department. Firstly, this evaluation can lead to more training to police officers on how to handle the situations without getting injured and this number of injuries over time can bring out some changes in the existing safety policies and procedures.As we plot this time series graph between month and the count of officers injured every month in the year 2016, we observe that above a certain percentage officers from the start of the Year till June gets injured with some variations in numbers each month, however, the percentage of injury experienced at the end of the year is far less than the start. This gives us a basic idea about the count/ severity of the crimes during these two quarters dealt by the officers. Presumably, the first quarter could have reported more number of crimes than the second but their intensity/severity is unknown.. Additionally, this data can be used to inform decision-making regarding resource allocation and deployment of officers.

##########################TWO-WAY TABLE########################3

table_data <- dt_new %>% 
  group_by(SUBJECT_OFFENSE, SUBJECT_WAS_ARRESTED) %>% 
  summarise(count = n()) %>% 
  pivot_wider(names_from = SUBJECT_WAS_ARRESTED, values_from = count) %>% 
  replace(is.na(.), 0)
## `summarise()` has grouped output by 'SUBJECT_OFFENSE'. You can override using
## the `.groups` argument.
reactable(table_data,
  columns = list(SUBJECT_OFFENSE = colDef(align = "left"),
    `No` = colDef(align = "center", width = 120),
    `Yes` = colDef(align = "center", width = 120)
  ),
  bordered = TRUE,
  striped = TRUE
)

The primary responsibility of the petrol department is to arrest all the crime doers to reduce the occurrence of criminal activities. Also it is important that severe crimes are taken immediate actions, if not it can become a threat to the society. This is perhaps one of the reason for us to create a table that displays the count of number of arrests that were made and the number that weren’t for every offense types. This table can be used to evaluate the effectiveness of law enforcements. Also it says which offenses are more likely to end up in arrest and which ones need additional resources to arrest the crime doer. Here we can see that the crime type APOWW has occurred a lot of times and almost 90% of the arrests were made while it as well tops in the count of No arrest made list with 12. Except this, as we see through the table almost arrests were made for all the type of crimes and no arrest action was pending. This shows the effectiveness of the Police Department in the Texas at their law enforcements.

df_filtered_subject_gender <- dt_new %>% filter(SUBJECT_GENDER != "NULL")
df_filtered_data <- df_filtered_subject_gender[!is.na(df_filtered_subject_gender$SUBJECT_GENDER) & !is.na(df_filtered_subject_gender$TYPE_OF_FORCE_USED1), ]

contingency_subject_force_table <- table(df_filtered_data$SUBJECT_GENDER, df_filtered_data$TYPE_OF_FORCE_USED1)

contingency_subject_force_table <- contingency_subject_force_table[-which(rowSums(contingency_subject_force_table) <= 5), ]
contingency_subject_force_table <- contingency_subject_force_table[, -which(colSums(contingency_subject_force_table) <= 5)]

# Perform the chi-squared test
chi_squared_res <- chisq.test(contingency_subject_force_table)
## Warning in chisq.test(contingency_subject_force_table): Chi-squared
## approximation may be incorrect
# Display the results
chi_squared_res
## 
##  Pearson's Chi-squared test
## 
## data:  contingency_subject_force_table
## X-squared = 98.134, df = 20, p-value = 2.713e-12
subject_gender <- dt_new %>% filter(SUBJECT_GENDER != "NULL")
subject_offense_data <- subject_gender[!is.na(subject_gender$SUBJECT_GENDER) & !is.na(subject_gender$SUBJECT_OFFENSE), ]

contingency_subject_offense <- table(subject_offense_data$SUBJECT_GENDER, subject_offense_data$SUBJECT_OFFENSE)

contingency_subject_offense <- contingency_subject_offense[-which(rowSums(contingency_subject_offense) <= 5), ]
contingency_subject_offense_table <- contingency_subject_offense[, -which(colSums(contingency_subject_offense) <= 5)]

# Perform the chi-squared test
chi_squared_result <- chisq.test(contingency_subject_offense_table)
## Warning in chisq.test(contingency_subject_offense_table): Chi-squared
## approximation may be incorrect
chi_squared_result
## 
##  Pearson's Chi-squared test
## 
## data:  contingency_subject_offense_table
## X-squared = 120.07, df = 44, p-value = 5.58e-09
# Convert the table object to a data frame
contingencydt <- as.data.frame(contingency_subject_offense_table)
contingencydt
##      Var1                                                Var2 Freq
## 1  Female                                               APOWW  116
## 2    Male                                               APOWW  234
## 3  Female                                             Assault    4
## 4    Male                                             Assault   25
## 5  Female                                          Assault/FV   20
## 6    Male                                          Assault/FV   72
## 7  Female                  Assault/FV, Assault/Public Servant    0
## 8    Male                  Assault/FV, Assault/Public Servant    7
## 9  Female        Assault/FV, Evading Arrest, Resisting Arrest    1
## 10   Male        Assault/FV, Evading Arrest, Resisting Arrest    5
## 11 Female                     Assault/FV, Other Felony Arrest    0
## 12   Male                     Assault/FV, Other Felony Arrest    8
## 13 Female                        Assault/FV, Resisting Arrest    5
## 14   Male                        Assault/FV, Resisting Arrest   22
## 15 Female                            Assault/FV, Warrant/Hold    0
## 16   Male                            Assault/FV, Warrant/Hold    8
## 17 Female                              Assault/Public Servant    9
## 18   Male                              Assault/Public Servant   38
## 19 Female                Assault/Public Servant, Warrant/Hold    1
## 20   Male                Assault/Public Servant, Warrant/Hold    6
## 21 Female                               Burglary of a Vehicle    0
## 22   Male                               Burglary of a Vehicle    7
## 23 Female                                 Burglary/Habitation    4
## 24   Male                                 Burglary/Habitation   20
## 25 Female                             Crim Trespass/Bldg/Prop    2
## 26   Male                             Crim Trespass/Bldg/Prop   18
## 27 Female           Crim Trespass/Bldg/Prop, Resisting Arrest    2
## 28   Male           Crim Trespass/Bldg/Prop, Resisting Arrest   10
## 29 Female                                  Disorderly Conduct    8
## 30   Male                                  Disorderly Conduct   12
## 31 Female                Disorderly Conduct, Resisting Arrest    2
## 32   Male                Disorderly Conduct, Resisting Arrest    4
## 33 Female                       Drug Possession - Misdemeanor    0
## 34   Male                       Drug Possession - Misdemeanor   11
## 35 Female                                                 DWI    1
## 36   Male                                                 DWI   14
## 37 Female                               DWI, Resisting Arrest    4
## 38   Male                               DWI, Resisting Arrest    5
## 39 Female                                      Evading Arrest    2
## 40   Male                                      Evading Arrest   32
## 41 Female       Evading Arrest, Drug Possession - Misdemeanor    0
## 42   Male       Evading Arrest, Drug Possession - Misdemeanor    8
## 43 Female                        Evading Arrest, Warrant/Hold    0
## 44   Male                        Evading Arrest, Warrant/Hold   17
## 45 Female                                           No Arrest   48
## 46   Male                                           No Arrest  255
## 47 Female                                 Other Felony Arrest    4
## 48   Male                                 Other Felony Arrest   19
## 49 Female                            Other Misdemeanor Arrest   10
## 50   Male                            Other Misdemeanor Arrest   19
## 51 Female          Other Misdemeanor Arrest, Resisting Arrest    0
## 52   Male          Other Misdemeanor Arrest, Resisting Arrest    6
## 53 Female                                 Public Intoxication   38
## 54   Male                                 Public Intoxication  143
## 55 Female         Public Intoxication, Assault/Public Servant    3
## 56   Male         Public Intoxication, Assault/Public Servant    4
## 57 Female  Public Intoxication, Drug Possession - Misdemeanor    0
## 58   Male  Public Intoxication, Drug Possession - Misdemeanor   10
## 59 Female               Public Intoxication, Resisting Arrest    4
## 60   Male               Public Intoxication, Resisting Arrest   27
## 61 Female Public Intoxication, Resisting Arrest, Warrant/Hold    0
## 62   Male Public Intoxication, Resisting Arrest, Warrant/Hold    7
## 63 Female Public Intoxication, Resisting Search and Transport    1
## 64   Male Public Intoxication, Resisting Search and Transport    5
## 65 Female                                    Resisting Arrest    4
## 66   Male                                    Resisting Arrest    9
## 67 Female                      Resisting Arrest, Warrant/Hold    3
## 68   Male                      Resisting Arrest, Warrant/Hold   10
## 69 Female                      Resisting Search and Transport    1
## 70   Male                      Resisting Search and Transport    9
## 71 Female                                             Robbery    0
## 72   Male                                             Robbery    7
## 73 Female                             Robbery, Evading Arrest    0
## 74   Male                             Robbery, Evading Arrest    8
## 75 Female                                               Theft    1
## 76   Male                                               Theft    5
## 77 Female                                   Traffic Violation    0
## 78   Male                                   Traffic Violation    9
## 79 Female             Unauthorize Use Vehicle, Evading Arrest    0
## 80   Male             Unauthorize Use Vehicle, Evading Arrest    8
## 81 Female                                        Warrant/Hold   16
## 82   Male                                        Warrant/Hold   94
## 83 Female         Warrant/Hold, Drug Possession - Misdemeanor    0
## 84   Male         Warrant/Hold, Drug Possession - Misdemeanor    9
## 85 Female                        Warrant/Hold, Evading Arrest    0
## 86   Male                        Warrant/Hold, Evading Arrest    8
## 87 Female                      Warrant/Hold, Resisting Arrest   11
## 88   Male                      Warrant/Hold, Resisting Arrest    8
## 89 Female        Warrant/Hold, Resisting Search and Transport    1
## 90   Male        Warrant/Hold, Resisting Search and Transport   10
# Display the results
correlation_heatmap <- ggplot(contingencydt, aes(x = Var1, y = Var2, fill = Freq)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "white", high = "RED") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Heat Map of Subject offense vs Subject Gender",
       x = "Subject Gender",
       y = "Subject Offense",
       fill = "Count")

ggplotly(correlation_heatmap)

Since correlation analysis had to be performed between two continous variable, we have performed the chi squared test for two categorical variable Subbect_offense and Subject gender. The obtained chi squared result is: data: contingency_subject_force_table X-squared = 98.134, df = 20, p-value = 2.713e-12 and the heatmap is displayed. With the results obtained we can confirm that the two variables are highly correlated with each other.

Reference Websites Utilized for the project: 1. KAGGLE 2. DATACAMP 3. CRAN 4.R-Bloggers 5.Rpubs